Background

This document is to provide some plotting examples for reference.


Example #1: US State and County Maps

The package usmap contains maps of US states and counties. There is also some associated data available about state and county demographics.

Example code includes:

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Population by county
data(countypop, package="usmap")
# Population by state
data(statepop, package="usmap")
# Poverty rate by county
data(countypov, package="usmap")
# Poverty rate by state
data(statepov, package="usmap")
# Population of largest city by state
data(citypop, package="usmap")
# Location of earthquakes
data(earthquakes, package="usmap")


# Included datasets
countypop
## # A tibble: 3,142 x 4
##    fips  abbr  county          pop_2015
##    <chr> <chr> <chr>              <dbl>
##  1 01001 AL    Autauga County     55347
##  2 01003 AL    Baldwin County    203709
##  3 01005 AL    Barbour County     26489
##  4 01007 AL    Bibb County        22583
##  5 01009 AL    Blount County      57673
##  6 01011 AL    Bullock County     10696
##  7 01013 AL    Butler County      20154
##  8 01015 AL    Calhoun County    115620
##  9 01017 AL    Chambers County    34123
## 10 01019 AL    Cherokee County    25859
## # ... with 3,132 more rows
statepop
## # A tibble: 51 x 4
##    fips  abbr  full                 pop_2015
##    <chr> <chr> <chr>                   <dbl>
##  1 01    AL    Alabama               4858979
##  2 02    AK    Alaska                 738432
##  3 04    AZ    Arizona               6828065
##  4 05    AR    Arkansas              2978204
##  5 06    CA    California           39144818
##  6 08    CO    Colorado              5456574
##  7 09    CT    Connecticut           3590886
##  8 10    DE    Delaware               945934
##  9 11    DC    District of Columbia   672228
## 10 12    FL    Florida              20271272
## # ... with 41 more rows
countypov
## # A tibble: 3,142 x 4
##    fips  abbr  county          pct_pov_2014
##    <chr> <chr> <chr>                  <dbl>
##  1 01001 AL    Autauga County          13.1
##  2 01003 AL    Baldwin County          13  
##  3 01005 AL    Barbour County          25.4
##  4 01007 AL    Bibb County             18.1
##  5 01009 AL    Blount County           17.5
##  6 01011 AL    Bullock County          35.1
##  7 01013 AL    Butler County           25  
##  8 01015 AL    Calhoun County          20.5
##  9 01017 AL    Chambers County         21.3
## 10 01019 AL    Cherokee County         18.6
## # ... with 3,132 more rows
statepov
## # A tibble: 51 x 4
##    fips  abbr  full                 pct_pov_2014
##    <chr> <chr> <chr>                       <dbl>
##  1 01    AL    Alabama                      19.2
##  2 02    AK    Alaska                       11.4
##  3 04    AZ    Arizona                      18.2
##  4 05    AR    Arkansas                     18.7
##  5 06    CA    California                   16.4
##  6 08    CO    Colorado                     12.1
##  7 09    CT    Connecticut                  10.8
##  8 10    DE    Delaware                     13  
##  9 11    DC    District of Columbia         18.4
## 10 12    FL    Florida                      16.6
## # ... with 41 more rows
citypop
## # A tibble: 51 x 6
##       lon   lat state                abbr  most_populous_city city_pop
##     <dbl> <dbl> <chr>                <chr> <chr>                 <dbl>
##  1  -86.8  33.6 Alabama              AL    Birmingham           212237
##  2 -150.   61.2 Alaska               AK    Anchorage            291826
##  3 -112.   33.4 Arizona              AZ    Phoenix             1445632
##  4  -92.3  34.7 Arkansas             AR    Little Rock          193524
##  5 -118.   34.0 California           CA    Los Angeles         3792621
##  6 -105.   39.8 Colorado             CO    Denver               600158
##  7  -73.2  41.2 Connecticut          CT    Bridgeport           144229
##  8  -75.6  39.8 Delaware             DE    Wilmington            70851
##  9  -77.0  38.9 District of Columbia DC    Washington           693972
## 10  -81.7  30.3 Florida              FL    Jacksonville         880619
## # ... with 41 more rows
earthquakes
## # A tibble: 2,254 x 3
##      lon   lat   mag
##    <dbl> <dbl> <dbl>
##  1 -118.  35.7  2.79
##  2 -118.  36.3  3.1 
##  3 -118.  36.2  2.74
##  4 -125.  40.4  2.77
##  5 -118.  35.7  2.59
##  6 -118.  35.7  2.82
##  7 -118.  35.7  2.78
##  8 -124.  40.3  3.42
##  9 -124.  41.7  2.5 
## 10 -119.  35.3  2.58
## # ... with 2,244 more rows
# Basic, empty US maps
usmap::plot_usmap(regions="states")

usmap::plot_usmap(regions="counties")

# Basic, empty US maps subsetted to an area
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM")
                  )

usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"))

# Basic, subsetted state map with poverty rates included
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM"), 
                  values="pct_pov_2014", data=statepov, labels=TRUE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by Western and Mountain States")

# Basic, subsetted county map with poverty rates included
usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"),
                  values="pct_pov_2014", data=countypov, labels=FALSE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by County in Great Lakes States")

Example #2: Converting and adding lat/lon data

The latitude and longitude data can be converted to a form suitable for usmap by using the usmap_transform function.

Example code includes:

# Transform the earthquakes data
trQuakes <- usmap::usmap_transform(earthquakes)
str(trQuakes)
## 'data.frame':    2254 obs. of  5 variables:
##  $ lon  : num  -118 -118 -118 -125 -118 ...
##  $ lat  : num  35.7 36.3 36.2 40.4 35.7 ...
##  $ mag  : num  2.79 3.1 2.74 2.77 2.59 2.82 2.78 3.42 2.5 2.58 ...
##  $ lon.1: num  -1575319 -1632293 -1595342 -2065325 -1574008 ...
##  $ lat.1: num  -872720 -785212 -811800 -197614 -867723 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trQuakes, aes(x=lon.1, y=lat.1, size=mag), alpha=0.4) + 
    labs(title="Earthquakes of Magnitude 2.5+ (H1 2019)")

# Transform the largest city data
trCity <- usmap::usmap_transform(citypop)
str(trCity)
## 'data.frame':    51 obs. of  8 variables:
##  $ lon               : num  -86.8 -112.1 -92.3 -118.2 -104.9 ...
##  $ lat               : num  33.6 33.5 34.7 34 39.8 ...
##  $ state             : chr  "Alabama" "Arizona" "Arkansas" "California" ...
##  $ abbr              : chr  "AL" "AZ" "AR" "CA" ...
##  $ most_populous_city: chr  "Birmingham" "Phoenix" "Little Rock" "Los Angeles" ...
##  $ city_pop          : num  212237 1445632 193524 3792621 600158 ...
##  $ lon.1             : num  1220905 -1120927 702478 -1673156 -417275 ...
##  $ lat.1             : num  -1165156 -1202575 -1107534 -1034842 -570173 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trCity, aes(x=lon.1, y=lat.1, size=city_pop)) + 
    labs(title="Largest City by State")

Example #3: Filtering and coloring by region

The census region definitions are included, and can be used to filter or color the maps.

Example code includes:

# Filter the map to include only new_england, mid_atlantic, and south_atlantic
usmap::plot_usmap(regions="states", 
                  include=c(usmap::.new_england, usmap::.mid_atlantic, usmap::.south_atlantic)
                  )

# Create regions data for US states
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0)))
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_discrete("Midwest") + 
    labs(title="Midwest Region US States")

# Enhanced Coloring and Labelling
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States")

Example #4: Labelling geographies

Since usmap is built on ggplot2, the standard techniques from ggplot2 can be used to enhance the geography labelling. Further, centroids for the geographies are available in loadable files.

Example code includes:

# Base state map labelled with defaults
usmap::plot_usmap(regions="states", labels=TRUE, label_color="red")

# Base county map labelled with defaults
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), labels=TRUE, label_color="grey")

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Add state labels using geom_text
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0))) %>%
    left_join(stCenter %>% select(x, y, full, fips) %>% rename(fname=full)) %>%
    mutate(fname=ifelse(fname=="District of Columbia", "DC", str_replace_all(fname, " ", "\n")))
## Joining, by = "fips"
usmap::plot_usmap(regions="states", data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States") + 
    geom_text(data=filter(regionData, region==1), aes(x=x, y=y, label=fname), size=2.5)

# Add county labels using geom_text
regionData <- usmap::countypop %>%
    mutate(region=as.factor(case_when(abbr=="OK" ~ 1, abbr=="TX" ~ 2, TRUE ~ 0))) %>%
    left_join(ctCenter %>% select(x, y, county, fips) %>% rename(cname=county)) %>%
    mutate(cname=str_replace_all(str_replace(cname, " County", ""), " ", "\n"))
## Joining, by = "fips"
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), 
                  data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("red", "orange"), "", labels=c("Oklahoma", "Texas")) + 
    labs(title="Texas and Oklahoma Counties") + 
    geom_text(data=filter(regionData, abbr %in% c("TX", "OK")), 
              aes(x=x, y=y, label=cname), size=2.5, 
              color=ifelse(pull(filter(regionData, abbr %in% c("TX", "OK")), abbr)=="OK", "white", "black")
              )

Example #5: Adding population centers

Separate data exists for key population centers, which can be loaded and then added to maps.

Example code includes:

# Transform the largest city data
str(maps::us.cities)
## 'data.frame':    1005 obs. of  6 variables:
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
trCity <- usmap::usmap_transform(select(maps::us.cities, long, lat, everything())) %>% 
    mutate(useName=str_replace_all(str_sub(name, 1, -4), " ", "\n"))
str(trCity)
## 'data.frame':    1005 obs. of  9 variables:
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
##  $ long.1     : num  24544 1533716 -1931842 1498524 2096820 ...
##  $ lat.1      : num  -1392670 -262400 -543195 -1350290 82431 ...
##  $ useName    : chr  "Abilene" "Akron" "Alameda" "Albany" ...
# Define a key region for plotting
rgnPlot <- c(usmap::.west_south_central, usmap::.east_south_central)
popFilter <- 100000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("South Central Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot the full nation for cities of 250k +
rgnPlot <- c(usmap::.midwest_region, usmap::.northeast_region, 
             usmap::.south_region, usmap::.west_region
             )
popFilter <- 250000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("US Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot cities by name for the Four Corners region
rgnPlot <- c("UT", "CO", "NM", "AZ")
popFilter <- 125000

# Add cities as a layer to the state map (points)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Add cities as a layer to the state map (text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_text(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName)
               )

popFilter <- 50000
popFilter2 <- 250000

# Add cities as a layer to the state map (points and text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               ) + 
    geom_text(data=filter(trCity, pop >= popFilter2, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName), color="red", show.legend=FALSE
               )

Example #6: Custom coloring geographies

Using scale_fill_manual(), custom colors can be created by geography.

Example code includes:

# Basic county population map with continuous colors
usmap::countypop %>% 
    filter(abbr %in% c("OH", "IN", "KY")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n")) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (000)") + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County")

# Custom county population map with colors - red for Indiana, blue for Kentucky, grey for Ohio
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), popLabels)

usmap::countypop %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

# Custom county poverty rate map with colors - red for Indiana, blue for Kentucky, grey for Ohio
povBucket <- c(0, 15, 30)
povLabels <- sapply(1:(length(povBucket)-1), FUN=function(x){paste0(povBucket[x], "-", povBucket[x+1])})
povLabels <- c(povLabels, paste0(povBucket[length(povBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), povLabels)

usmap::countypov %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pct_pov_2014, povBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(povBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Poverty Rate (%)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Poverty Rate by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

Example #7: Custom labeling of key geographies

The above techniques can be combined for custom labeling of key geographies.

Example code includes:

# Basic state population data
stateData <- usmap::statepop %>% 
    mutate(pop=round(pop_2015/1000000, 1), 
           name=ifelse(full=="District of Columbia", "DC", str_replace(full, " ", "\n")), 
           lab=paste0(abbr, "\n(", pop, ")\n")
           )

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Grab centroids for top 5 states
top5State <- stateData %>%
    top_n(5, pop) %>% 
    left_join(select(stCenter, x, y, fips))
## Joining, by = "fips"
# Plot state population with continuous colors and custom labels
stateData %>% 
    usmap::plot_usmap(regions="states", data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (millions)") + 
    labs(title="Population by State", subtitle="Top 5 in millions") + 
    geom_text(data=top5State, aes(x=x, y=y, label=lab), color="white", size=4, fontface="bold")

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Custom county population map with colors - red for Wisconsin, blue for Michigan
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("MI", "WI"), each=3), popLabels)

# Grab county data for counties exceeding the top popBucket
ctyData <- usmap::countypop %>%
    filter(abbr %in% c("MI", "WI")) %>%
    mutate(pop=round(pop_2015/1000, 0), 
           name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           lab=paste0(name, "\n(", pop, ")\n")
           )

topCounty <- ctyData %>%
    filter(pop >= max(popBucket)) %>%
    left_join(select(ctCenter, x, y, fips))
## Joining, by = "fips"
# Create county population map
usmap::countypop %>% 
    filter(abbr %in% c("MI", "WI")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="WI", 0, abbr=="MI", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("WI", "MI"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    geom_text(data=topCounty, aes(x=x, y=y, label=lab), size=3, fontface="bold", color="white") +
    labs(title="Michigan and Wisconsin - Population by County", subtitle="Labelled Pop. (000) for 500k+") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3)) + 
    theme(panel.background=element_rect(color="black", fill="lightgrey"))

Example #8: Plotting Weather Data (Temperatures and Dew Points)

The ggridges package has weather data for Lincoln, NE in the data file ‘lincoln_weather’. The data are captured once per day for 366 days of 2016. Simple plots can be made of the average temperatures and dew points.

Example code includes:

data(lincoln_weather, package="ggridges")
str(lincoln_weather, give.attr=FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  24 variables:
##  $ CST                         : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ Max Temperature [F]         : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ Mean Temperature [F]        : int  24 23 23 17 29 33 30 25 9 11 ...
##  $ Min Temperature [F]         : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ Max Dew Point [F]           : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ Mean Dew Point [F]          : int  13 14 15 13 25 32 30 22 4 5 ...
##  $ Min Dewpoint [F]            : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ Max Humidity                : int  88 100 92 92 96 100 100 92 87 87 ...
##  $ Mean Humidity               : int  68 72 73 82 83 91 96 85 77 75 ...
##  $ Min Humidity                : int  47 44 54 72 70 82 92 78 67 63 ...
##  $ Max Sea Level Pressure [In] : num  30.5 30.4 30.5 30.5 30.2 ...
##  $ Mean Sea Level Pressure [In]: num  30.4 30.3 30.4 30.4 30.1 ...
##  $ Min Sea Level Pressure [In] : num  30.3 30.2 30.3 30.2 30 ...
##  $ Max Visibility [Miles]      : int  10 10 10 10 10 10 9 10 10 10 ...
##  $ Mean Visibility [Miles]     : int  10 10 10 9 8 4 3 6 9 10 ...
##  $ Min Visibility [Miles]      : int  10 10 10 6 5 0 0 2 5 10 ...
##  $ Max Wind Speed [MPH]        : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ Mean Wind Speed[MPH]        : int  9 6 5 7 13 7 7 16 14 5 ...
##  $ Max Gust Speed [MPH]        : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ Precipitation [In]          : chr  "0" "0" "0" "0" ...
##  $ CloudCover                  : int  0 0 0 1 4 8 8 8 5 0 ...
##  $ Events                      : chr  NA NA NA NA ...
##  $ WindDir [Degrees]           : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ Month                       : Factor w/ 12 levels "December","November",..: 12 12 12 12 12 12 12 12 12 12 ...
# Extract temperature and dew point data
tdData <- lincoln_weather %>%
    select(CST, maxT=`Max Temperature [F]`, minT=`Min Temperature [F]`, meanT=`Mean Temperature [F]`, 
           maxD=`Max Dew Point [F]`, minD=`Min Dewpoint [F]`, meanD=`Mean Dew Point [F]`
           ) %>%
    mutate(date=as.Date(CST))
str(tdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  8 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxT : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ minT : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ maxD : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ minD : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Plot temperatures by day
tdData %>%
    select(date, maxT, meanT, minT) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

# Plot dew points by day
tdData %>%
    select(date, maxD, meanD, minD) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
# Create an XTS for temperature data
tdXTS <- xts(select(tdData, minT, meanT, maxT), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Temperature Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Temperature Average (Lincoln, NE 2016)")

# Create an XTS for dew-point data
tdXTS <- xts(select(tdData, minD, meanD, maxD), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Dew Point Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Dew Point Average (Lincoln, NE 2016)")

Example #9: Combining xts and ggplot2

The xts package is good for working with time series data, while ggplot2 is strong for customizing plots. The packages can be combined in using the weather data.

Example code includes:

# Create an XTS for temperature and dewpoint data
tdXTS <- xts(select(tdData, minT, meanT, maxT, minD, meanD, maxD), order.by=tdData$date)

# Use xts for monthly average and ggplot2 for plotting
basePlot <- tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL) %>% 
    ggplot(aes(x=date-lubridate::days(15))) + 
    geom_ribbon(aes(ymin=minT, ymax=maxT), color="lightblue", fill="lightblue", alpha=0.5) +
    geom_line(aes(y=meanT), color="blue", lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Monthly Avg. Temperature (F)"
         )
basePlot

# Add labelling for the three elements
hiMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[3]
loMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[9]
muMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[6]
hiPoint <- c(60, 75)
loPoint <- c(45, 25)
muPoint <- c(72.5, 45)

labFrame <- data.frame(x=c(hiMonth, loMonth, muMonth), 
                       yend=c(hiPoint[1], loPoint[1], muPoint[1]),
                       y=c(hiPoint[2], loPoint[2], muPoint[2]), 
                       text=c("Avg. Monthly High", "Avg. Monthly Low", "Avg. Monthly Mean")) %>%
    mutate(xend=x)

basePlot + 
    geom_segment(data=labFrame, aes(x=x, y=y, xend=xend, yend=yend), arrow=arrow()) + 
    geom_text(data=labFrame, aes(x=x, y=y+c(5, -5, -5), label=text), fontface="bold", size=4)

# Can also create and plot a custom rolling average
baseData <- tdXTS %>%
    data.frame(date=index(.), row.names=NULL)

base7Day <- rollapply(tdXTS, 7, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

base30Day <- rollapply(tdXTS, 30, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

plotFrame <- bind_rows(baseData, base7Day, base30Day, .id="rolling") %>%
    mutate(rollLabel=case_when(rolling==1 ~ "Daily", 
                               rolling==2 ~ "7 Day Rolling", 
                               rolling==3 ~ "30 Day Rolling",
                               TRUE ~ "ERROR"
                               )
           )
            
plotFrame %>%
    ggplot(aes(x=date)) + 
    geom_line(aes(y=meanT, color=rollLabel, group=rollLabel), lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Daily Avg. Temperature (F)"
         )
## Warning: Removed 35 rows containing missing values (geom_path).

Example #10: Plotting Weather Data (Humidity)

Humidity data are also available in the lincoln_weather dataset. There is a relationship between temperature, dewpoint, and humidity.

Example code includes:

htdData <- lincoln_weather %>%
    select(CST, meanT=`Mean Temperature [F]`, meanD=`Mean Dew Point [F]`, meanH=`Mean Humidity`) %>%
    mutate(date=as.Date(CST), month=lubridate::month(date))
str(htdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ meanH: int  68 72 73 82 83 91 96 85 77 75 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
##  $ month: num  1 1 1 1 1 1 1 1 1 1 ...
# Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

htdData %>%
    filter((meanH < 25) | is.na(meanH))
## # A tibble: 2 x 6
##   CST       meanT meanD meanH date       month
##   <chr>     <int> <int> <int> <date>     <dbl>
## 1 2016-2-27    50    21     0 2016-02-27     2
## 2 2016-2-28    44    NA    NA 2016-02-28     2
htdData <- htdData %>%
    filter(!((meanH < 25) | is.na(meanH)))

# Updated Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD)) + 
    geom_histogram() + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

htdData %>% 
    filter(meanD >= meanT)
## # A tibble: 2 x 6
##   CST        meanT meanD meanH date       month
##   <chr>      <int> <int> <int> <date>     <dbl>
## 1 2016-1-7      30    30    96 2016-01-07     1
## 2 2016-11-27    37    39    85 2016-11-27    11
htdData <- htdData %>%
    filter(meanT >= meanD)

# Updated Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD, y=..density..)) + 
    geom_histogram(binwidth=1) + 
    geom_density(color="red") + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Proportion")

# Average humidity by month
htdData %>%
    group_by(month) %>%
    summarize(meanH=mean(meanH, na.rm=TRUE)) %>%
    ggplot(aes(x=as.factor(month), y=meanH)) + 
    geom_col() + 
    labs(title="Average Humidity by Month", subtitle="Lincoln, NE (2016)", x="Month", y="Mean Humidity (%)")

# Relationship between temperature and dewpoint
htdData %>%
    ggplot(aes(x=meanD, y=meanT)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )

# Relationship between dewpoint depression and humidity
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    ggplot(aes(x=dpD, y=meanH)) + 
    geom_point() + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Mean Humidity (%)"
         )

# Relationship between temperature and dewpoint and humidity
humInts <- c(0, 50, 60, 70, 80)
humLabel <- sapply(1:(length(humInts)-1), FUN=function(x) { paste0(humInts[x], "-", humInts[x+1]) })
humLabel <- c(humLabel, paste0(humInts[length(humInts)], "+"))

htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE) +
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. dewpoint
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth() +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Dewpoint Depression (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. temperature
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanT, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE, method="lm") +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Temperature (F)", y="Dewpoint Depression (F)"
         )

# Linear regression for temperature, dewpoint, and humidity
htdReg <- htdData %>%
    mutate(dpD=meanT-meanD) %>%
    lm(meanH ~ meanT + dpD, data=.)
summary(htdReg)
## 
## Call:
## lm(formula = meanH ~ meanT + dpD, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.3569  -2.5472  -0.1894   2.7285  14.4934 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 82.89890    0.83037  99.833  < 2e-16 ***
## meanT        0.09771    0.01403   6.965 1.57e-11 ***
## dpD         -1.79530    0.04811 -37.317  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.869 on 360 degrees of freedom
## Multiple R-squared:  0.796,  Adjusted R-squared:  0.7949 
## F-statistic: 702.4 on 2 and 360 DF,  p-value: < 2.2e-16
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    mutate(predH=predict(htdReg, newdata=.)) %>%
    ggplot(aes(x=predH, y=meanH)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Predicted Humidity (%)", y="Actual Humidity (%)"
         )

Example #11: Plotting Weather Data (Wind)

Wind data (speed, gust, direction) are also available in the lincoln_weather dataset..

Example code includes:

# Extract wind data
wdData <- lincoln_weather %>%
    select(CST, maxW=`Max Wind Speed [MPH]`, maxG=`Max Gust Speed [MPH]`, meanW=`Mean Wind Speed[MPH]`, 
           dirW=`WindDir [Degrees]`
           ) %>%
    mutate(date=as.Date(CST))
str(wdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxW : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ maxG : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ meanW: int  9 6 5 7 13 7 7 16 14 5 ...
##  $ dirW : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Manage missing data
wdData[!complete.cases(wdData), ]
## # A tibble: 6 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-2-6      18    NA     5   241 2016-02-06
## 2 2016-2-28     36    45    18    NA 2016-02-28
## 3 2016-5-31     17    NA    11   319 2016-05-31
## 4 2016-6-18     NA    NA    NA    -1 2016-06-18
## 5 2016-6-19     NA    NA    NA    -1 2016-06-19
## 6 2016-12-19    17    NA     9   209 2016-12-19
wdData <- wdData %>%
    filter(dirW != -1, !is.na(dirW)) %>%
    mutate(maxG=ifelse(is.na(maxG), maxW, maxG))
summary(wdData)
##      CST                 maxW             maxG            meanW      
##  Length:363         Min.   :  8.00   Min.   : 10.00   Min.   : 0.00  
##  Class :character   1st Qu.: 16.00   1st Qu.: 21.00   1st Qu.: 6.00  
##  Mode  :character   Median : 21.00   Median : 26.00   Median : 8.00  
##                     Mean   : 21.71   Mean   : 27.93   Mean   : 9.11  
##                     3rd Qu.: 25.00   3rd Qu.: 33.00   3rd Qu.:11.00  
##                     Max.   :131.00   Max.   :143.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.5   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.8   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Manage very high wind data
wdData[wdData$maxG >= 60, ]
## # A tibble: 3 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-3-23     45    61    25    20 2016-03-23
## 2 2016-6-17    131   143    20   170 2016-06-17
## 3 2016-12-25    51    64    19   139 2016-12-25
wdData <- wdData %>%
    filter(maxG <= 80)
summary(wdData)
##      CST                 maxW           maxG           meanW      
##  Length:362         Min.   : 8.0   Min.   :10.00   Min.   : 0.00  
##  Class :character   1st Qu.:16.0   1st Qu.:21.00   1st Qu.: 6.00  
##  Mode  :character   Median :21.0   Median :26.00   Median : 8.00  
##                     Mean   :21.4   Mean   :27.61   Mean   : 9.08  
##                     3rd Qu.:25.0   3rd Qu.:33.00   3rd Qu.:11.00  
##                     Max.   :51.0   Max.   :64.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.2   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.9   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Density of wind speeds
wdData %>%
    select(date, meanW, maxW, maxG) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=value, fill=name)) + 
    geom_density(alpha=0.5) + 
    scale_fill_discrete(name="Wind Speed [MPH]", labels=c("Max Gust", "Max", "Mean")) + 
    labs(title="Lincoln, NE (2016) Wind Speeds", y="Density", x="Wind Speed [MPH]")

# Density of wind direction
wdData %>%
    select(date, dirW) %>%
    ggplot(aes(x=dirW)) + 
    geom_density(alpha=0.5, fill="blue") + 
    labs(title="Winds are mainly from the S and NW", subtitle="Lincoln, NE (2016)", 
         y="Density", x="Wind Direction"
         )

# Wind speed and direction
wdData %>% 
    ggplot(aes(x=meanW, y=dirW)) + 
    geom_point(alpha=0.25) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Mean Wind Speed", x="Mean Wind Speed [MPH]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)

# Wind speed and direction as factors
windDirs <- c("N", "NE", "E", "SE", "S", "SW", "W", "NW")
windSpeeds <- c(0, 5, 10, 15)
windLabels <- sapply(1:(length(windSpeeds)-1), FUN=function(x){ paste0(windSpeeds[x], "-", windSpeeds[x+1]) })
windLabels <- c(windLabels, paste0(windSpeeds[length(windSpeeds)], "+"))
wdData <- wdData %>% 
    mutate(wd=factor(floor(((dirW+22.5)/45) %% 8), levels=0:7, labels=windDirs), 
           ws=factor(findInterval(meanW, windSpeeds), levels=length(windSpeeds):1, labels=rev(windLabels))
           )

# Summary of interaction between wind speed and wind direction 
wdData %>%
    group_by(wd) %>% 
    summarize(n=n(), avgMean=mean(meanW), avgMax=mean(maxW), avgGust=mean(maxG))
## # A tibble: 8 x 5
##   wd        n avgMean avgMax avgGust
##   <fct> <int>   <dbl>  <dbl>   <dbl>
## 1 N        51   10.1    21.4    27.7
## 2 NE       18    8.17   19.4    24.8
## 3 E        16    7.44   18.9    24.6
## 4 SE       69    8.01   19.6    25.5
## 5 S        73    8.89   20.6    26.8
## 6 SW       26    6.96   20.3    26.2
## 7 W        29    8.10   20.4    26.5
## 8 NW       80   11.1    25.4    32.2
table(wdData$wd, wdData$ws)
##     
##      15+ 10-15 5-10 0-5
##   N    8    14   25   4
##   NE   1     7    6   4
##   E    0     3   12   1
##   SE   3    15   42   9
##   S    5    23   43   2
##   SW   0     6   15   5
##   W    3     4   18   4
##   NW  17    31   28   4
# Graph of wind speed and wind direction
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction")

# Graph of wind speed and wind direction (polar coordinates)
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction") + 
    coord_polar(start=-0.4)

Example #12: Archived granular weather Data (METAR)

Iowa State has a great database of archived weather data, including the historical METAR data (meteorological aerodrome report) for a number of reporting stations.

METAR include information on visibility, wind, temperature, dew point, precipitation, clouds, barometric pressure, and other features that may impact safe aviation.

The data for station KLNK (Lincoln, NE airport) was saved as a CSV from Iowa State

Some processing is required before using the METAR data:

Example code includes:

# Load METAR data
klnk <- readr::read_csv("./RInputFiles/metar_klnk_2016.txt", na=c("", "NA", "M"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
str(klnk, give.attr=FALSE)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
# Filter to only data that ends with times ending in 54Z
metarKLNK <- klnk %>%
    filter(str_detect(metar, "54Z"))
dim(metarKLNK)
## [1] 8813   29
# There should be 24*368=8832 records, so there are a handful (19) of missing METAR observations
minDate <- min(metarKLNK$valid)
expDate <- minDate + lubridate::hours(0:(24*368 - 1))

# Observations expected but not recorded
as.POSIXct(setdiff(expDate, metarKLNK$valid), origin="1970-01-01", tz="UTC")
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
# Observations recorded but not expected
setdiff(metarKLNK$valid, expDate)
## numeric(0)
# Confirmation of uniqueness
length(unique(metarKLNK$valid)) == length(metarKLNK$valid)
## [1] TRUE
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)

mtxWind <- metarKLNK %>%
    pull(metar) %>%
    str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
head(mtxWind)
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA
table(mtxWind[, 2], useNA="ifany")
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19
table(mtxWind[, 3], useNA="ifany")
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19
table(mtxWind[, 4], useNA="ifany")
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777
# Verify that winds not captured are in fact missing from the METAR
metarKLNK[which(is.na(mtxWind[, 2])), "metar"]
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
metarKLNK <- metarKLNK %>%
    mutate(dirW=mtxWind[, 2], 
           spdW=as.numeric(mtxWind[, 3]), 
           gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
           )

# Plot for the wind direction
metarKLNK %>%
    ggplot(aes(x=dirW)) + 
    geom_bar() + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="# Hourly Observations", x="Wind Direction"
         )

# Plot for the minimum, average, and maximum wind speed by wind direction
# Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
metarKLNK %>%
    filter(!is.na(dirW)) %>%
    group_by(dirW) %>%
    summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
    ggplot(aes(x=dirW)) + 
    geom_point(aes(y=meanWind), color="red", size=2) + 
    geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="Wind Speed [KT]", x="Wind Direction"
         )

# Plot for the wind speed
# Roughly 10% of the time, there is no wind in Lincoln
metarKLNK %>%
    ggplot(aes(x=spdW)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    labs(title="Roughly 10% of wind speeds in Lincoln, NE measure 0 Knots", subtitle="KLNK METAR (2016)", 
         y="% Hourly Observations", x="Wind Speed {KT]"
         )
## Warning: Removed 19 rows containing non-finite values (stat_count).

metarKLNK %>% 
    filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
    mutate(dirW=as.numeric(dirW)) %>%
    group_by(dirW, spdW) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=spdW, y=dirW)) + 
    geom_point(alpha=0.1, aes(size=n)) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Wind Speed", x="Wind Speed [KT]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)
## Warning: Removed 4 rows containing missing values (geom_point).

Example #13: Extracting Key Elements from METAR

A properly formatted METAR includes the following information in order, though with variable amounts of other information in between.

dddd54Z ddddd[Gdd]KT dSM [M]dd/[M]dd Adddd RMK SLPddd Tdddddddd

  • dddd54Z is the two-digit date and four-digit Zulu time (KLNK METAR are taken at 54 minutes past the hour)
  • ddddd[Gdd]KT is the three-digit wind direction (can be VRB), two digit wind speed in knots, and sometimes the two digit maximum gust in knots
  • dSM is the visibility in statute miles. This is somewhat tricky in that d can be any of 0-10 but can also be 1/4, 1/2, 3/4, 1 1/4, 1 1/2, 1 3/4, 2 1/2
  • [M]dd/[M]dd is the temperature in celsius and dewpoint in celsius. M means negative
  • Adddd is the four-digit altimeter reading
  • RMK notes that the remarks are beginning
  • SLPddd notes the three-digit sea-level pressure
  • Tdddddddd notes the four digit temperature in Celsius and the four digit dewpoint in celsius. If it begins with 1 it is negative. The fourth digit is the decimal. It will always be a Celsius reading that best corresponds to integer degrees of Fahrenheit

Example code includes:

metAll <- metarKLNK %>%
    pull(metar)

# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Find the number of matching elements
str_detect(metAll, pattern=valMet) %>% table()
## .
## FALSE  TRUE 
##    23  8790
# The strings that do not match have errors in the raw data (typically, missing wind speed)
metAll[!str_detect(metAll, pattern=valMet)]
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
# A matrix of string matches can be obtained
mtxParse <- str_match(metAll, pattern=valMet)
head(mtxParse)
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
# Create a data frame
dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE)
names(dfParse) <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                    "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                    )
dfParse <- tibble::as_tibble(dfParse)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  11 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : chr  "05" "00" "00" "03" ...
##  $ WindGust  : chr  NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: chr  "10SM" "10SM" "10SM" "10SM" ...
##  $ TempC     : chr  "M03" "M03" "M03" "M03" ...
##  $ DewC      : chr  "M07" "M07" "M07" "M06" ...
##  $ Altimeter : chr  "A3029" "A3030" "A3030" "A3031" ...
##  $ SLP       : chr  "SLP275" "SLP277" "SLP277" "SLP281" ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
# Convert to numeric where appropriate
dfParse <- dfParse %>%
    mutate(WindSpeed = as.integer(WindSpeed), 
           WindGust = as.numeric(WindGust), 
           Visibility = as.numeric(str_replace(Visibility, "SM", "")),
           TempC = as.integer(str_replace(TempC, "M", "-")), 
           DewC = as.integer(str_replace(DewC, "M", "-")), 
           Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
           SLP = as.integer(str_replace(SLP, "SLP", "")), 
           TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
           DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
           )
## Warning: NAs introduced by coercion
# Investigate the data
set.seed(2003211416)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  13 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
head(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
tail(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
dfParse %>% 
    sample_n(20)
## # A tibble: 20 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
# Check for NA values
colSums(is.na(dfParse))
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      TempF       DewF 
##         23         23         23         23         23         23
# Plot of counts by key metric
keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )

for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Correct for visibility
# Areas that have \\d \\d/\\dSM
sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
valSM1 <- str_replace(valSM1, "SM", "")
valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
valSM2 <- as.integer(str_sub(valSM2, 2, 2))

dfParse[sm1, "Visibility"] <- valSM1
dfParse[sm2, "Visibility"] <- dfParse[sm2, "Visibility"] + valSM2

dfParse %>% 
    count(Visibility)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
valGust <- as.integer(str_sub(valGust, 7, 8))

dfParse[gustCheck, "WindGust"] <- valGust

dfParse %>% 
    count(WindGust) %>% 
    as.data.frame
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
dfParse <- dfParse %>%
    mutate(modSLP=ifelse(dfParse$SLP < 500, 1000 + dfParse$SLP/10, 900 + dfParse$SLP/10))

dfParse %>%
    group_by(SLP, modSLP) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=SLP, y=modSLP, size=n)) + 
    geom_point(alpha=0.3)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
keyMetric <- c("WindGust", "Visibility", "modSLP")
for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #14: Relationships Between METAR Variables

Many of the METAR variables are correlated/associated to one another.

Example code includes:

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Add the date back to the file (should edit the above instead)
dfParse <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid))
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : num  12 12 12 12 12 12 12 12 12 12 ...
# Keep only complete cases and find correlations
mtxCorr <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid)) %>%
    select_at(vars(all_of(coreNum))) %>%
    filter(complete.cases(.)) %>%
    cor()

# Print the correlations and show a heatmap
mtxCorr %>%
    round(2)
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00
corrplot::corrplot(mtxCorr, method="color", title="Lincoln, NE Hourly Weather Correlations (2016)")

# Create a function for plotting two variables against each other
plotNumCor <- function(var1, var2, title=NULL) {
    if (is.null(title)) 
        { title <- paste0("Lincoln, NE (2016) Hourly Correlations of ", var1, " and ", var2) }
    p <- dfParse %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title)
    print(p)
}

# The three linear or almost linear relationships
plotNumCor("TempC", "TempF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("DewC", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Strongly and positively related
plotNumCor("TempF", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Moderately negatively correlated
plotNumCor("TempF", "Altimeter")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("TempF", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "WindSpeed")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Predict modSLP from Altimeter
lmSLP1 <- lm(modSLP ~ Altimeter, data=dfParse)
lmSLP2 <- lm(modSLP ~ Altimeter + TempF, data=dfParse)
summary(lmSLP1)
## 
## Call:
## lm(formula = modSLP ~ Altimeter, data = dfParse)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
summary(lmSLP2)
## 
## Call:
## lm(formula = modSLP ~ Altimeter + TempF, data = dfParse)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
# Plot predictions vs. actual (model 1)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred1=predict(lmSLP1)) %>%
    count(modSLP, pred1) %>%
    ggplot(aes(x=modSLP, y=pred1)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude Only as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

# Plot predictions vs. actual (model 2)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred2=predict(lmSLP2)) %>%
    count(modSLP, pred2) %>%
    ggplot(aes(x=modSLP, y=pred2)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude and Temperature as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

Example #15: Extracting Cloud Data from METAR

Cloud data is also included in the METAR, with the type of clouds being described as:

  • CLR - there are no clouds below 12,000 feet
  • VVddd - there is a vertical visibility of ddd hundred feet (cannot tell where the clouds are above that)
  • FEWddd - there are clouds with bases at ddd feet, and they obscure 25% or less of the sky
  • SCTddd - there are clouds with bases at ddd feet, and they obscure 25%-50% of the sky
  • BKNddd - there are clouds with bases at ddd feet, and they obscure 50%-99% of the sky
  • OVCddd - there is a full overcast with base at ddd feet

The ceiling is considered the lowest height that is measured as any of OVC, BKN, or VV.

Example code includes:

# Extract the CLR records
mtxCLR <- str_extract_all(metarKLNK$metar, pattern=" CLR ", simplify=TRUE)
if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

# Extract the VV records
mtxVV <- str_extract_all(metarKLNK$metar, pattern="VV(\\d{3})", simplify=TRUE)
if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)

# Extract the FEW records
mtxFEW <- str_extract_all(metarKLNK$metar, pattern="FEW(\\d{3})", simplify=TRUE)
numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

# Extract the SCT records
mtxSCT <- str_extract_all(metarKLNK$metar, pattern="SCT(\\d{3})", simplify=TRUE)
numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

# Extract the BKN records
mtxBKN <- str_extract_all(metarKLNK$metar, pattern="BKN(\\d{3})", simplify=TRUE)
numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

# Extract the OVC records
mtxOVC <- str_extract_all(metarKLNK$metar, pattern="OVC(\\d{3})", simplify=TRUE)
numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

# Summarize as a data frame
tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                            numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                            )

# Get the counts
# As expected, if isCLR then nothing else, and if isVV then nothing else
tblClouds %>% 
    count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
    as.data.frame()
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
# Investigate the problem data
metarKLNK$metar[rowSums(tblClouds, na.rm=TRUE)==0]
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
# Get the counts of most obscuration
tblClouds %>%
    filter(rowSums(., na.rm=TRUE) > 0) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  )
           ) %>%
    ggplot(aes(x=wType, y=..count../sum(..count..))) + 
    geom_bar() + 
    labs(title="Highest Obscuration by Cloud - Lincoln, NE (2016)", x="Cloud Type", 
         y="Proportion of Hourly Measurements"
         )

# Integrate the clouds data
mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)

# Cycle through to find levels of a given type
ckClouds <- function(cloudType) {
    isKey <- which(apply(mtxCloud, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtxCloud[, min(isKey)], cloudType, "")) * 100
}
lowOVC <- ckClouds("OVC")
lowVV <- ckClouds("VV")
lowBKN <- ckClouds("BKN")
lowSCT <- ckClouds("SCT")
lowFEW <- ckClouds("FEW")

# Integrate the lowest cloud type by level
lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
lowCloud
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows
# Get the lowest cloud level
minCloud <- lowCloud
minCloud[is.na(minCloud)] <- 999999
minCloudLevel <- apply(minCloud, 1, FUN=min)
minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

noCloudPct <- mean(minCloudLevel == 999999)
noCeilingPct <- mean(minCeilingLevel == 999999)

# Plot the minimum cloud level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCloudLevel != 999999) %>%
    ggplot(aes(x=minCloudLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Cloud Height (when some clouds exist)", 
         subtitle="Lincoln, NE (2016)"
         )

# Plot the minimum ceiling level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCeilingLevel != 999999) %>%
    ggplot(aes(x=minCeilingLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Ceiling Height (when a ceiling exists)", 
         subtitle="Lincoln, NE (2016)"
         )

Example #16: Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Integrate the cloud data and convert month to a factor
dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  ), 
           month=factor(month, levels=1:12, labels=month.abb)
           )
dfFull <- tibble::as_tibble(dfFull)
str(dfFull)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  28 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
# Run the boxplot of a factor against a numeric variable
plotFactorNumeric <- function(fctVar, numVar, title=NULL) {
    if (is.null(title)) { title <- paste0("Lincoln, NE (2016) Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- dfFull %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title)
    print(p)
}

# Run for all of the key variables against wind speed and cloud type
keyVar <- c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF")
for (var in keyVar) { plotFactorNumeric("month", var) }

for (var in keyVar) { plotFactorNumeric("wType", var) }

# Create stacked bars for cloud type by month
dfFull %>%
    filter(!is.na(wType), wType!="Error") %>%
    ggplot(aes(x=month, fill=wType)) + 
    geom_bar(position="fill") + 
    labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")

Example #17: Functional Form - METAR download and initial wind processing

Example 12 can be converted to functional form so that the process can be applied to other reporting stations and time periods.

Example code includes:

# Function to make an initial read of the data, filter to METAR records, and check date-times
readMETAR <- function(fileName="./RInputFiles/metar_klnk_2016.txt", timeZ="54Z",
                      expMin=as.POSIXct("2015-12-31 00:54:00", tz="UTC"), expDays=365
                      ) {

    # Load METAR data
    initRead <- readr::read_csv(fileName, na=c("", "NA", "M"))
    str(initRead, give.attr=FALSE)

    # Filter to only data that ends with times ending in 54Z
    filterRead <- initRead %>%
        filter(str_detect(metar, timeZ))
    dim(filterRead)

    # Check that the dates and times included are as expected
    expDate <- expMin + lubridate::hours(0:(24*expDays - 1))
    
    # Observations expected but not recorded
    cat("\n*** OBSERVATIONS EXPECTED BUT NOT RECORDED ***\n")
    print(as.POSIXct(setdiff(expDate, filterRead$valid), origin="1970-01-01", tz="UTC"))

    # Observations recorded but not expected
    cat("\n*** OBSERVATIONS RECORDED BUT NOT EXPECTED ***\n")
    print(as.POSIXct(setdiff(filterRead$valid, expDate), origin="1970-01-01", tz="UTC"))

    # Confirmation of uniqueness
    cat("\n*** Are the extracted records unique? ***\n")
    print(length(unique(filterRead$valid)) == length(filterRead$valid))
    cat("\n")
    
    # Return the dataset as a tibble
    tibble::as_tibble(filterRead)
}
funcMETAR <- readMETAR(expDays=368)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
funcMETAR
## # A tibble: 8,813 x 29
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 19 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)
extractWind <- function(met) {

    mtxWind <- met %>%
        pull(metar) %>%
        str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
    cat("\n*** First 6 winds and parsing ***\n")
    print(head(mtxWind))

    cat("\n*** Table of WIND DIRECTION ***\n")
    print(table(mtxWind[, 2], useNA="ifany"))
    cat("\n*** Table of WIND SPEED ***\n")
    print(table(mtxWind[, 3], useNA="ifany"))
    cat("\n*** Table of WIND GUST ***\n")
    print(table(mtxWind[, 4], useNA="ifany"))

    # Verify that winds not captured are in fact missing from the METAR
    cat("\n *** WIND DATA WAS NOT CAPTURED FROM: *** \n")
    print(met[which(is.na(mtxWind[, 2])), "metar"])
    cat("\n")

    met %>%
        mutate(dirW=mtxWind[, 2], 
               spdW=as.numeric(mtxWind[, 3]), 
               gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
               )
}
windMETAR <- extractWind(funcMETAR)
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
windMETAR
## # A tibble: 8,813 x 32
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 22 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>, dirW <chr>, spdW <dbl>, gustW <dbl>
# Generate basic wind plots
basicWindPlots <- function(met, desc="Lincoln, NE", gran="KLNK METAR (2016)") {

    # Plot for the wind direction
    wDir <- met %>%
        ggplot(aes(x=dirW)) + 
        geom_bar() + 
        labs(title=paste0(desc, " Wind Direction"), subtitle=gran, 
             y="# Hourly Observations", x="Wind Direction"
             )
    print(wDir)

    # Plot for the minimum, average, and maximum wind speed by wind direction
    # Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
    wSpeedByDir <- met %>%
        filter(!is.na(dirW)) %>%
        group_by(dirW) %>%
        summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
        ggplot(aes(x=dirW)) + 
        geom_point(aes(y=meanWind), color="red", size=2) + 
        geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
        labs(title=paste0(desc, " Wind Speed (Max, Mean, Min) By Wind Direction"), subtitle=gran, 
             y="Wind Speed [KT]", x="Wind Direction"
             )
    print(wSpeedByDir)

    # Plot for the wind speed
    pctZero <- sum(met$spdW==0, na.rm=TRUE) / length(met$spdW)
    wSpeed <- met %>%
        ggplot(aes(x=spdW)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        labs(title=paste0(round(100*pctZero), "% of wind speeds in ", desc, " measure 0 Knots"), 
             subtitle=gran, 
             y="% Hourly Observations", x="Wind Speed {KT]"
             )
    print(wSpeed)

    wPolarDirSpeed <- met %>% 
        filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
        mutate(dirW=as.numeric(dirW)) %>%
        group_by(dirW, spdW) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=spdW, y=dirW)) + 
        geom_point(alpha=0.1, aes(size=n)) + 
        coord_polar(theta="y") + 
        labs(title=paste0(desc, " Direction vs. Wind Speed"), subtitle=gran, x="Wind Speed [KT]") + 
        scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
        scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
        geom_point(aes(x=0, y=0), color="red", size=2)
    print(wPolarDirSpeed)
}
basicWindPlots(windMETAR)

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).

Example #18: Functional Form for Extracting Key Elements from METAR

METAR parsing can also be converted to a functional form. This will need to be modified to be more general, since the codes used for a few things like clouds can vary from station to station.

Example code includes:

# Code for the initial METAR parsing
initialParseMETAR <- function(met, val, labs) {
    
    # Pull the METAR data
    metAll <- met %>%
        pull(metar)
    
    # Find the number of matching elements
    cat("\n*** Tentative Summary of Element Parsing *** \n")
    str_detect(metAll, pattern=val) %>% 
        table() %>%
        print()

    # The strings that do not match have errors in the raw data (typically, missing wind speed)
    cat("\n*** Data Not Matched *** \n")
    print(metAll[!str_detect(metAll, pattern=val)])

    # A matrix of string matches can be obtained
    mtxParse <- str_match(metAll, pattern=val)
    cat("\n*** Parsing matrix summary *** \n")
    print(dim(mtxParse))
    print(head(mtxParse))

    # Create a data frame
    dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE) %>%
        mutate(dtime=met$valid, origMETAR=met$metar)
    names(dfParse) <- c(labs, "dtime", "origMETAR")
    dfParse <- tibble::as_tibble(dfParse)
    cat("\n*** Summary of the parsed data *** \n")
    glimpse(dfParse)
    
    dfParse
}


# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Create the names for the search string to parse in to
labsMet <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
             "TempC", "DewC", "Altimeter", "SLP", "FahrC"
             )

# Run the METAR parsing on the raw data
initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
initMETAR
## # A tibble: 8,813 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC  Altimeter SLP  
##    <chr> <chr>   <chr>     <chr>    <chr> <chr>      <chr> <chr> <chr>     <chr>
##  1 54Z ~ 300     05        <NA>     " "   10SM       M03   M07   A3029     SLP2~
##  2 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  3 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  4 54Z ~ 280     03        <NA>     " "   10SM       M03   M06   A3031     SLP2~
##  5 54Z ~ 310     05        <NA>     " "   10SM       M03   M07   A3033     SLP2~
##  6 54Z ~ 010     09        <NA>     " "   10SM       M06   M10   A3033     SLP2~
##  7 54Z ~ 000     00        <NA>     " "   10SM       M07   M11   A3034     SLP2~
##  8 54Z ~ 010     03        <NA>     " "   10SM       M08   M11   A3034     SLP2~
##  9 54Z ~ 000     00        <NA>     " "   10SM       M10   M12   A3034     SLP2~
## 10 54Z ~ 000     00        <NA>     " "   10SM       M09   M12   A3035     SLP2~
## # ... with 8,803 more rows, and 3 more variables: FahrC <chr>, dtime <dttm>,
## #   origMETAR <chr>
# Helper function for generating plots by key variables
plotcountsByMetric <- function(df, mets) {
    
    # Plot of counts by key metric
    for (x in mets) {
        p <- df %>%
            group_by_at(vars(all_of(x))) %>%
            summarize(n=n()) %>%
            ggplot(aes_string(x=x, y="n")) + 
            geom_col() + 
            labs(title=x, y="Count")
        print(p)
    }
}


# Code for the conversion of METAR to meaningful numeric
# Should make this much more general later
convertMETAR <- function(met, metrics, seed=2003211416) {
    
    # Convert to numeric where appropriate
    dfParse <- met %>%
        mutate(WindSpeed = as.integer(WindSpeed), 
               WindGust = as.numeric(WindGust), 
               Visibility = as.numeric(str_replace(Visibility, "SM", "")),
               TempC = as.integer(str_replace(TempC, "M", "-")), 
               DewC = as.integer(str_replace(DewC, "M", "-")), 
               Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
               SLP = as.integer(str_replace(SLP, "SLP", "")), 
               TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
               DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
               )

    # Investigate the data
    cat("\n *** Parsed data structure, head, tail, and random sample *** \n")
    str(dfParse)
    print(head(dfParse))
    print(tail(dfParse))
    set.seed(seed)
    dfParse %>% 
        sample_n(20) %>%
        print()

    # Check for NA values
    cat("\n *** Number of NA values *** \n")
    print(colSums(is.na(dfParse)))

    # Plot of counts by key metric
    plotcountsByMetric(dfParse, mets=metrics)
    
    # Return the parsed dataset
    dfParse
}

keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )
convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
## Warning: NAs introduced by coercion
## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Address the visibility issues
getVisibility <- function(curMet, origMet) {
    
    # Get the original METAR data
    metAll <- origMet %>%
        pull(metar)
    
    # Correct for visibility
    # Areas that have \\d \\d/\\dSM
    sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
    sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

    valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
    valSM1 <- str_replace(valSM1, "SM", "")
    valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

    valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
    valSM2 <- as.integer(str_sub(valSM2, 2, 2))

    curMet[sm1, "Visibility"] <- valSM1
    curMet[sm2, "Visibility"] <- curMet[sm2, "Visibility"] + valSM2

    curMet %>% 
        count(Visibility) %>%
        print()
    
    curMet
}
parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
getWindGusts <- function(curMet, origMet) {

    metAll <- origMet %>%
        pull(metar)
    
    gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
    valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
    valGust <- as.integer(str_sub(valGust, 7, 8))

    curMet[gustCheck, "WindGust"] <- valGust

    curMet %>% 
        count(WindGust) %>% 
        as.data.frame %>%
        print()
    
    curMet
}
parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
fixSLP <- function(curMet) {

    dfParse <- curMet %>%
        mutate(modSLP=ifelse(curMet$SLP < 500, 1000 + curMet$SLP/10, 900 + curMet$SLP/10))

    p <- dfParse %>%
        group_by(SLP, modSLP) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=SLP, y=modSLP, size=n)) + 
        geom_point(alpha=0.3)
    print(p)
    
    dfParse
}
parseMETAR <- fixSLP(parseMETAR)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #19: Functional Form For Relationships Between METAR Variables

The relationships between METAR variables can also be converted to functional form.

Example code includes:

# Function to calculate, display, and plot variable correlations
corMETAR <- function(met, numVars, subT="") {

    # Keep only complete cases and report on data kept
    dfUse <- met %>%
        select_at(vars(all_of(numVars))) %>%
        filter(complete.cases(.))
    
    nU <- nrow(dfUse)
    nM <- nrow(met)
    myPct <- round(100*nU/nM, 1)
    cat("\n *** Correlations use ", nU, " complete cases (", myPct, "% of ", nM, " total) ***\n", sep="")
    
    # Create the correlation matrix
    mtxCorr <- dfUse %>%
        cor()

    # Print the correlations
    mtxCorr %>%
        round(2) %>%
        print()

    # Display a heat map
    corrplot::corrplot(mtxCorr, method="color", title=paste0("Hourly Weather Correlations\n", subT))
}

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Run the correlations function
corMETAR(parseMETAR, numVars=coreNum)
## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

# Create a function for plotting two variables against each other
plotNumCor <- function(met, var1, var2, title=NULL, subT="") {
    if (is.null(title)) 
        { title <- paste0("Hourly Correlations of ", var1, " and ", var2) }
    p <- met %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title, subtitle=subT)
    print(p)
}

var1List <- c("TempC", "DewC", "Altimeter", "TempF", "TempF",     "TempF",  "Altimeter")
var2List <- c("TempF", "DewF", "modSLP",    "DewF",  "Altimeter", "modSLP", "WindSpeed")

for (n in 1:length(var1List)) {
    plotNumCor(parseMETAR, var1List[n], var2List[n])
}
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Function for linear regressions on METAR data
lmMETAR <- function(met, y, x, yName, subT="Lincoln, NE (2016) Hourly METAR") {
    
    # Convert to formula
    myChar <- paste0(y, " ~ ", x)
    cat("\n *** Regression call is:", myChar, "***\n")
    
    # Run regression
    regr <- lm(formula(myChar), data=met)
    
    # Summarize regression
    print(summary(regr))
    
    # Predict the new values
    pred <- predict(regr, newdata=met)
    
    # Plot the predictions
    p <- met %>%
        select_at(vars(all_of(y))) %>%
        mutate(pred=pred) %>%
        group_by_at(vars(all_of(c(y, "pred")))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=y, y="pred")) + 
        geom_point(aes(size=n), alpha=0.25) + 
        geom_smooth(aes(weight=n), method="lm") + 
        labs(title=paste0("Predicted vs. Actual ", yName, " - ", x, " as Predictor"), 
             subtitle=subT, x=paste0("Actual ", yName), y=paste0("Predicted ", yName)
             )
    print(p)
}

lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

Example #20: Functional Form for Extracting Cloud Data from METAR

Cloud data can also be extracted using the functional form.

Example code includes:

extractClouds <- function(met, metVar, subT="Lincoln, NE (2016) Hourly METAR") {

    metAll <- met %>%
        pull(metVar)
    
    # Extract the CLR records
    mtxCLR <- str_extract_all(metAll, pattern=" CLR ", simplify=TRUE)
    if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
    isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

    # Extract the VV records
    mtxVV <- str_extract_all(metAll, pattern="VV(\\d{3})", simplify=TRUE)
    if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
    isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
    htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)

    # Extract the FEW records
    mtxFEW <- str_extract_all(metAll, pattern="FEW(\\d{3})", simplify=TRUE)
    numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the SCT records
    mtxSCT <- str_extract_all(metAll, pattern="SCT(\\d{3})", simplify=TRUE)
    numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the BKN records
    mtxBKN <- str_extract_all(metAll, pattern="BKN(\\d{3})", simplify=TRUE)
    numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the OVC records
    mtxOVC <- str_extract_all(metAll, pattern="OVC(\\d{3})", simplify=TRUE)
    numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

    # Summarize as a data frame
    tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                                numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                                )

    # Get the counts
    cat("\n*** Counts by number of layers of each cloud type ***\n")
    tblClouds %>% 
        count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
        as.data.frame() %>%
        print()

    # Investigate the problem data
    cat("\n*** METAR records where no clouds were extracted ***\n")
    metAll[rowSums(tblClouds, na.rm=TRUE)==0] %>%
        print()
    
    # Plot the counts of most obscuration
    p <- tblClouds %>%
        filter(rowSums(., na.rm=TRUE) > 0) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            )
               ) %>%
        ggplot(aes(x=wType, y=..count../sum(..count..))) + 
        geom_bar() + 
        labs(title="Highest Obscuration by Cloud", subtitle=subT, 
             x="Cloud Type", y="Proportion of Hourly Measurements"
             )
    print(p)
    
    # Integrate the clouds data
    mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)
    cat("\n*** Dimensions for the cloud matrix ***\n")
    print(dim(mtxCloud))
    
    list(tblClouds=tblClouds, mtxCloud=mtxCloud)
}

# Run the initial cloud extraction
initClouds <- extractClouds(parseMETAR, metVar="origMETAR")
## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
str(initClouds)
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
# Cycle through to find levels of a given type
ckClouds <- function(cloudType, mtx) {
    isKey <- which(apply(mtx, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtx[, min(isKey)], cloudType, "")) * 100
}


# Function to create the lowest cloud levels
findLowestClouds <- function(mtxCloud, subT="Lincoln, NE (2016) Hourly METAR") {

    # Find the lowest clouds by cloud type
    lowOVC <- ckClouds("OVC", mtx=mtxCloud)
    lowVV <- ckClouds("VV", mtx=mtxCloud)
    lowBKN <- ckClouds("BKN", mtx=mtxCloud)
    lowSCT <- ckClouds("SCT", mtx=mtxCloud)
    lowFEW <- ckClouds("FEW", mtx=mtxCloud)

    # Integrate the lowest cloud type by level
    lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
    cat("\n*** Lowest clouds by type tibble ***\n")
    print(lowCloud)

    # Get the lowest cloud level
    minCloud <- lowCloud
    minCloud[is.na(minCloud)] <- 999999
    minCloudLevel <- apply(minCloud, 1, FUN=min)
    minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

    noCloudPct <- mean(minCloudLevel == 999999)
    noCeilingPct <- mean(minCeilingLevel == 999999)

    # Plot the minimum cloud level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCloudLevel != 999999) %>%
        ggplot(aes(x=minCloudLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Cloud Height (when some clouds exist)", subtitle=subT
             )
    print(p)

    # Plot the minimum ceiling level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCeilingLevel != 999999) %>%
        ggplot(aes(x=minCeilingLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Ceiling Height (when a ceiling exists)", subtitle=subT
             )
    print(p)
    
    list(lowCloud=lowCloud, minCeilingLevel=minCeilingLevel, minCloudLevel=minCloudLevel)
}

processedClouds <-findLowestClouds(initClouds$mtxCloud)
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

str(processedClouds)
## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...

Example #21: Functional Form for Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Function to bind the existing parsed METAR data with the cloud data
bindMETAR <- function(dfParse, tblClouds, lowCloud) {

    # Integrate the cloud data and convert month to a factor
    dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            ), 
               month=factor(lubridate::month(dtime), levels=1:12, labels=month.abb)
               )
    
    dfFull <- tibble::as_tibble(dfFull)
    str(dfFull)
    
    dfFull
}

fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                       tblClouds=initClouds$tblClouds, 
                       lowCloud=processedClouds$lowCloud
                       )
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
# Updated function for plotting numeric by factor
plotFactorNumeric <- function(met, fctVar, numVar, title=NULL, subT) {
    if (is.null(title)) { title <- paste0("Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- met %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title, subtitle=subT)
    print(p)
}

# Function for creating cloud plots
makeFactorPlots <- function(met, 
                            fctVar=c("month", "wType"), 
                            keyVar=c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF"), 
                            desc="Lincoln, NE (2016) Hourly METAR"
                            ) {

    # Run for all of the key variables against wind speed and cloud type
    for (varF in fctVar) {
        for (varK in keyVar) { 
            plotFactorNumeric(met, fctVar=varF, numVar=varK, subT=desc) 
        }
    }

    # Create stacked bars for cloud type by month
    # dfFull %>%
    #     filter(!is.na(wType), wType!="Error") %>%
    #     ggplot(aes(x=month, fill=wType)) + 
    #     geom_bar(position="fill") + 
    #     labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")
}

makeFactorPlots(fullMETAR)

Example #22: Combining Functional Forms for METAR Processing

The functions can be combined in to a single routine for reading, parsing, and running EDA on METAR data..

Example code includes:

# Function to run the full process
runAllMETAR <- function(fname, timeZ, expMin, expDays, locMET, shortMET, longMET, valMet, 
                        labsMet=c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                                  "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                                  ), 
                        keyMetric=c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
                                    "DewC", "Altimeter", "SLP", "TempF", "DewF"
                                    ), 
                        coreNum=c("TempC", "TempF", "DewC", "DewF", 
                                  "Altimeter", "modSLP", "WindSpeed", "Visibility"
                                  ), 
                        var1List=c("TempC", "DewC", "Altimeter", "TempF", "TempF", "TempF", "Altimeter"), 
                        var2List=c("TempF", "DewF", "modSLP", "DewF", "Altimeter", "modSLP", "WindSpeed")
                        ) {
    
    # Read in the METAR data
    funcMETAR <- readMETAR(fileName=fname, timeZ=timeZ, expMin=expMin, expDays=expDays)
    # funcMETAR

    # Extract wind data from METAR
    windMETAR <- extractWind(funcMETAR)
    # windMETAR

    # Run basic wind plots
    basicWindPlots(windMETAR, desc=locMET, gran=shortMET)

    # Run the METAR parsing on the raw data
    initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
    # initMETAR

    # Parse and convert the METAR data
    convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
    # convMETAR

    # Fix problems with visibility, wind gusts, and SLP
    parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
    parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
    parseMETAR <- fixSLP(parseMETAR)

    # Check updated plots
    plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))

    # Run the correlations function
    corMETAR(parseMETAR, numVars=coreNum, subT=longMET)

    # Plot correlations
    for (n in 1:length(var1List)) {
        plotNumCor(parseMETAR, var1List[n], var2List[n], subT=longMET)
    }

    # Run lm models for SLP vs Altimeter and (optionally) Temperature
    lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure", subT=longMET)
    lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure", subT=longMET)

    # Run the initial cloud extraction
    initClouds <- extractClouds(parseMETAR, metVar="origMETAR", subT=longMET)
    str(initClouds)

    # Find the lowest cloud levels and lowest ceilings
    processedClouds <-findLowestClouds(initClouds$mtxCloud, subT=longMET)
    str(processedClouds)

    # Bind the processed METAR and the cloud data
    fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                           tblClouds=initClouds$tblClouds, 
                           lowCloud=processedClouds$lowCloud
                           )

    # Create box plots for key weather elements against month and cloud type
    makeFactorPlots(fullMETAR, desc=longMET)
    
    # Return all of the elements
    list(fullMETAR=fullMETAR, funcMETAR=funcMETAR, windMETAR=windMETAR, 
         initMETAR=initMETAR, convMETAR=convMETAR, parseMETAR=parseMETAR, 
         initClouds=initClouds, processedClouds=processedClouds
         )
}


# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_klnk_2016.txt"  # file name for raw METAR data
timeZ <- "54Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:54:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Lincoln, NE"  # Description of city or location
shortMET <- "KLNK METAR (2016)"  # Station code and timing
longMET <- "Lincoln, NE Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
klnk2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(klnk2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  30 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ isCLR     : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV      : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT    : int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ lowVV     : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN    : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  29 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_logical(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_logical(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  32 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ dirW             : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ spdW             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ gustW            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  13 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : chr [1:8813] "05" "00" "00" "03" ...
##   ..$ WindGust  : chr [1:8813] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: chr [1:8813] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8813] "M03" "M03" "M03" "M03" ...
##   ..$ DewC      : chr [1:8813] "M07" "M07" "M07" "M06" ...
##   ..$ Altimeter : chr [1:8813] "A3029" "A3030" "A3030" "A3031" ...
##   ..$ SLP       : chr [1:8813] "SLP275" "SLP277" "SLP277" "SLP281" ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  15 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  16 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   .. ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8813 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   .. ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##   ..$ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...